/*-------------------------------------------------------------------------
 *
 * syslogger.c
 *
 * The system logger (syslogger) appeared in Postgres 8.0. It catches all
 * stderr output from the postmaster, backends, and other subprocesses
 * by redirecting to a pipe, and writes it to a set of logfiles.
 * It's possible to have size and age limits for the logfile configured
 * in postgresql.conf. If these limits are reached or passed, the
 * current logfile is closed and a new one is created (rotated).
 * The logfiles are stored in a subdirectory (configurable in
 * postgresql.conf), using a user-selectable naming scheme.
 *
 * Author: Andreas Pflug <pgadmin@pse-consulting.de>
 *
 * Copyright (c) 2004-2017, PostgreSQL Global Development Group
 *
 *
 * IDENTIFICATION
 *      src/backend/postmaster/syslogger.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include <fcntl.h>
#include <limits.h>
#include <signal.h>
#include <time.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/time.h>

#include "lib/stringinfo.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "nodes/pg_list.h"
#include "pgstat.h"
#include "pgtime.h"
#include "postmaster/fork_process.h"
#include "postmaster/postmaster.h"
#include "postmaster/syslogger.h"
#include "storage/dsm.h"
#include "storage/ipc.h"
#include "storage/latch.h"
#include "storage/pg_shmem.h"
#include "utils/guc.h"
#include "utils/ps_status.h"
#include "utils/timestamp.h"

/*
 * We read() into a temp buffer twice as big as a chunk, so that any fragment
 * left after processing can be moved down to the front and we'll still have
 * room to read a full chunk.
 */
#define READ_BUF_SIZE (2 * PIPE_CHUNK_SIZE)


/*
 * GUC parameters.  Logging_collector cannot be changed after postmaster
 * start, but the rest can change at SIGHUP.
 */
bool        Logging_collector = false;
int            Log_RotationAge = HOURS_PER_DAY * MINS_PER_HOUR;
int            Log_RotationSize = 10 * 1024;
char       *Log_directory = NULL;
char       *Log_filename = NULL;
bool        Log_truncate_on_rotation = false;
int            Log_file_mode = S_IRUSR | S_IWUSR;

/*
 * Globally visible state (used by elog.c)
 */
bool        am_syslogger = false;

extern bool redirection_done;

/*
 * Private state
 */
static pg_time_t next_rotation_time;
static bool pipe_eof_seen = false;
static bool rotation_disabled = false;
static FILE *syslogFile = NULL;
static FILE *csvlogFile = NULL;
NON_EXEC_STATIC pg_time_t first_syslogger_file_time = 0;
static char *last_file_name = NULL;
static char *last_csv_file_name = NULL;

/*
 * Buffers for saving partial messages from different backends.
 *
 * Keep NBUFFER_LISTS lists of these, with the entry for a given source pid
 * being in the list numbered (pid % NBUFFER_LISTS), so as to cut down on
 * the number of entries we have to examine for any one incoming message.
 * There must never be more than one entry for the same source pid.
 *
 * An inactive buffer is not removed from its list, just held for re-use.
 * An inactive buffer has pid == 0 and undefined contents of data.
 */
typedef struct
{
    int32        pid;            /* PID of source process */
    StringInfoData data;        /* accumulated data, as a StringInfo */
} save_buffer;

#define NBUFFER_LISTS 256
static List *buffer_lists[NBUFFER_LISTS];

/* These must be exported for EXEC_BACKEND case ... annoying */
#ifndef WIN32
int            syslogPipe[2] = {-1, -1};
#else
HANDLE        syslogPipe[2] = {0, 0};
#endif

#ifdef WIN32
static HANDLE threadHandle = 0;
static CRITICAL_SECTION sysloggerSection;
#endif

/*
 * Flags set by interrupt handlers for later service in the main loop.
 */
static volatile sig_atomic_t got_SIGHUP = false;
static volatile sig_atomic_t rotation_requested = false;


/* Local subroutines */
#ifdef EXEC_BACKEND
static pid_t syslogger_forkexec(void);
static void syslogger_parseArgs(int argc, char *argv[]);
#endif
NON_EXEC_STATIC void SysLoggerMain(int argc, char *argv[]) pg_attribute_noreturn();
static void process_pipe_input(char *logbuffer, int *bytes_in_logbuffer);
static void flush_pipe_input(char *logbuffer, int *bytes_in_logbuffer);
static void open_csvlogfile(void);
static FILE *logfile_open(const char *filename, const char *mode,
             bool allow_errors);

#ifdef WIN32
static unsigned int __stdcall pipeThread(void *arg);
#endif
static void logfile_rotate(bool time_based_rotation, int size_rotation_for);
static char *logfile_getname(pg_time_t timestamp, const char *suffix);
static void set_next_rotation_time(void);
static void sigHupHandler(SIGNAL_ARGS);
static void sigUsr1Handler(SIGNAL_ARGS);
static void update_metainfo_datafile(void);


/*
 * Main entry point for syslogger process
 * argc/argv parameters are valid only in EXEC_BACKEND case.
 */
NON_EXEC_STATIC void
SysLoggerMain(int argc, char *argv[])
{// #lizard forgives
#ifndef WIN32
    char        logbuffer[READ_BUF_SIZE];
    int            bytes_in_logbuffer = 0;
#endif
    char       *currentLogDir;
    char       *currentLogFilename;
    int            currentLogRotationAge;
    pg_time_t    now;

    now = MyStartTime;

#ifdef EXEC_BACKEND
    syslogger_parseArgs(argc, argv);
#endif                            /* EXEC_BACKEND */

    am_syslogger = true;

    init_ps_display("logger process", "", "", "");

    /*
     * If we restarted, our stderr is already redirected into our own input
     * pipe.  This is of course pretty useless, not to mention that it
     * interferes with detecting pipe EOF.  Point stderr to /dev/null. This
     * assumes that all interesting messages generated in the syslogger will
     * come through elog.c and will be sent to write_syslogger_file.
     */
    if (redirection_done)
    {
        int            fd = open(DEVNULL, O_WRONLY, 0);

        /*
         * The closes might look redundant, but they are not: we want to be
         * darn sure the pipe gets closed even if the open failed.  We can
         * survive running with stderr pointing nowhere, but we can't afford
         * to have extra pipe input descriptors hanging around.
         *
         * As we're just trying to reset these to go to DEVNULL, there's not
         * much point in checking for failure from the close/dup2 calls here,
         * if they fail then presumably the file descriptors are closed and
         * any writes will go into the bitbucket anyway.
         */
        close(fileno(stdout));
        close(fileno(stderr));
        if (fd != -1)
        {
            (void) dup2(fd, fileno(stdout));
            (void) dup2(fd, fileno(stderr));
            close(fd);
        }
    }

    /*
     * Syslogger's own stderr can't be the syslogPipe, so set it back to text
     * mode if we didn't just close it. (It was set to binary in
     * SubPostmasterMain).
     */
#ifdef WIN32
    else
        _setmode(_fileno(stderr), _O_TEXT);
#endif

    /*
     * Also close our copy of the write end of the pipe.  This is needed to
     * ensure we can detect pipe EOF correctly.  (But note that in the restart
     * case, the postmaster already did this.)
     */
#ifndef WIN32
    if (syslogPipe[1] >= 0)
        close(syslogPipe[1]);
    syslogPipe[1] = -1;
#else
    if (syslogPipe[1])
        CloseHandle(syslogPipe[1]);
    syslogPipe[1] = 0;
#endif

    /*
     * Properly accept or ignore signals the postmaster might send us
     *
     * Note: we ignore all termination signals, and instead exit only when all
     * upstream processes are gone, to ensure we don't miss any dying gasps of
     * broken backends...
     */

    pqsignal(SIGHUP, sigHupHandler);    /* set flag to read config file */
    pqsignal(SIGINT, SIG_IGN);
    pqsignal(SIGTERM, SIG_IGN);
    pqsignal(SIGQUIT, SIG_IGN);
    pqsignal(SIGALRM, SIG_IGN);
    pqsignal(SIGPIPE, SIG_IGN);
    pqsignal(SIGUSR1, sigUsr1Handler);    /* request log rotation */
    pqsignal(SIGUSR2, SIG_IGN);

    /*
     * Reset some signals that are accepted by postmaster but not here
     */
    pqsignal(SIGCHLD, SIG_DFL);
    pqsignal(SIGTTIN, SIG_DFL);
    pqsignal(SIGTTOU, SIG_DFL);
    pqsignal(SIGCONT, SIG_DFL);
    pqsignal(SIGWINCH, SIG_DFL);

    PG_SETMASK(&UnBlockSig);

#ifdef WIN32
    /* Fire up separate data transfer thread */
    InitializeCriticalSection(&sysloggerSection);
    EnterCriticalSection(&sysloggerSection);

    threadHandle = (HANDLE) _beginthreadex(NULL, 0, pipeThread, NULL, 0, NULL);
    if (threadHandle == 0)
        elog(FATAL, "could not create syslogger data transfer thread: %m");
#endif                            /* WIN32 */

    /*
     * Remember active logfile's name.  We recompute this from the reference
     * time because passing down just the pg_time_t is a lot cheaper than
     * passing a whole file path in the EXEC_BACKEND case.
     */
    last_file_name = logfile_getname(first_syslogger_file_time, NULL);

    /* remember active logfile parameters */
    currentLogDir = pstrdup(Log_directory);
    currentLogFilename = pstrdup(Log_filename);
    currentLogRotationAge = Log_RotationAge;
    /* set next planned rotation time */
    set_next_rotation_time();
    update_metainfo_datafile();

    /* main worker loop */
    for (;;)
    {
        bool        time_based_rotation = false;
        int            size_rotation_for = 0;
        long        cur_timeout;
        int            cur_flags;

#ifndef WIN32
        int            rc;
#endif

        /* Clear any already-pending wakeups */
        ResetLatch(MyLatch);

        /*
         * Process any requests or signals received recently.
         */
        if (got_SIGHUP)
        {
            got_SIGHUP = false;
            ProcessConfigFile(PGC_SIGHUP);

            /*
             * Check if the log directory or filename pattern changed in
             * postgresql.conf. If so, force rotation to make sure we're
             * writing the logfiles in the right place.
             */
            if (strcmp(Log_directory, currentLogDir) != 0)
            {
                pfree(currentLogDir);
                currentLogDir = pstrdup(Log_directory);
                rotation_requested = true;

                /*
                 * Also, create new directory if not present; ignore errors
                 */
                mkdir(Log_directory, S_IRWXU);
            }
            if (strcmp(Log_filename, currentLogFilename) != 0)
            {
                pfree(currentLogFilename);
                currentLogFilename = pstrdup(Log_filename);
                rotation_requested = true;
            }

            /*
             * If rotation time parameter changed, reset next rotation time,
             * but don't immediately force a rotation.
             */
            if (currentLogRotationAge != Log_RotationAge)
            {
                currentLogRotationAge = Log_RotationAge;
                set_next_rotation_time();
            }

            /*
             * If we had a rotation-disabling failure, re-enable rotation
             * attempts after SIGHUP, and force one immediately.
             */
            if (rotation_disabled)
            {
                rotation_disabled = false;
                rotation_requested = true;
            }

            /*
             * Force rewriting last log filename when reloading configuration.
             * Even if rotation_requested is false, log_destination may have
             * been changed and we don't want to wait the next file rotation.
             */
            update_metainfo_datafile();
        }

        if (Log_RotationAge > 0 && !rotation_disabled)
        {
            /* Do a logfile rotation if it's time */
            now = (pg_time_t) time(NULL);
            if (now >= next_rotation_time)
                rotation_requested = time_based_rotation = true;
        }

        if (!rotation_requested && Log_RotationSize > 0 && !rotation_disabled)
        {
            /* Do a rotation if file is too big */
            if (ftell(syslogFile) >= Log_RotationSize * 1024L)
            {
                rotation_requested = true;
                size_rotation_for |= LOG_DESTINATION_STDERR;
            }
            if (csvlogFile != NULL &&
                ftell(csvlogFile) >= Log_RotationSize * 1024L)
            {
                rotation_requested = true;
                size_rotation_for |= LOG_DESTINATION_CSVLOG;
            }
        }

        if (rotation_requested)
        {
            /*
             * Force rotation when both values are zero. It means the request
             * was sent by pg_rotate_logfile.
             */
            if (!time_based_rotation && size_rotation_for == 0)
                size_rotation_for = LOG_DESTINATION_STDERR | LOG_DESTINATION_CSVLOG;
            logfile_rotate(time_based_rotation, size_rotation_for);
        }

        /*
         * Calculate time till next time-based rotation, so that we don't
         * sleep longer than that.  We assume the value of "now" obtained
         * above is still close enough.  Note we can't make this calculation
         * until after calling logfile_rotate(), since it will advance
         * next_rotation_time.
         *
         * Also note that we need to beware of overflow in calculation of the
         * timeout: with large settings of Log_RotationAge, next_rotation_time
         * could be more than INT_MAX msec in the future.  In that case we'll
         * wait no more than INT_MAX msec, and try again.
         */
        if (Log_RotationAge > 0 && !rotation_disabled)
        {
            pg_time_t    delay;

            delay = next_rotation_time - now;
            if (delay > 0)
            {
                if (delay > INT_MAX / 1000)
                    delay = INT_MAX / 1000;
                cur_timeout = delay * 1000L;    /* msec */
            }
            else
                cur_timeout = 0;
            cur_flags = WL_TIMEOUT;
        }
        else
        {
            cur_timeout = -1L;
            cur_flags = 0;
        }

        /*
         * Sleep until there's something to do
         */
#ifndef WIN32
        rc = WaitLatchOrSocket(MyLatch,
                               WL_LATCH_SET | WL_SOCKET_READABLE | cur_flags,
                               syslogPipe[0],
                               cur_timeout,
                               WAIT_EVENT_SYSLOGGER_MAIN);

        if (rc & WL_SOCKET_READABLE)
        {
            int            bytesRead;

            bytesRead = read(syslogPipe[0],
                             logbuffer + bytes_in_logbuffer,
                             sizeof(logbuffer) - bytes_in_logbuffer);
            if (bytesRead < 0)
            {
                if (errno != EINTR)
                    ereport(LOG,
                            (errcode_for_socket_access(),
                             errmsg("could not read from logger pipe: %m")));
            }
            else if (bytesRead > 0)
            {
                bytes_in_logbuffer += bytesRead;
                process_pipe_input(logbuffer, &bytes_in_logbuffer);
                continue;
            }
            else
            {
                /*
                 * Zero bytes read when select() is saying read-ready means
                 * EOF on the pipe: that is, there are no longer any processes
                 * with the pipe write end open.  Therefore, the postmaster
                 * and all backends are shut down, and we are done.
                 */
                pipe_eof_seen = true;

                /* if there's any data left then force it out now */
                flush_pipe_input(logbuffer, &bytes_in_logbuffer);
            }
        }
#else                            /* WIN32 */

        /*
         * On Windows we leave it to a separate thread to transfer data and
         * detect pipe EOF.  The main thread just wakes up to handle SIGHUP
         * and rotation conditions.
         *
         * Server code isn't generally thread-safe, so we ensure that only one
         * of the threads is active at a time by entering the critical section
         * whenever we're not sleeping.
         */
        LeaveCriticalSection(&sysloggerSection);

        (void) WaitLatch(MyLatch,
                         WL_LATCH_SET | cur_flags,
                         cur_timeout,
                         WAIT_EVENT_SYSLOGGER_MAIN);

        EnterCriticalSection(&sysloggerSection);
#endif                            /* WIN32 */

        if (pipe_eof_seen)
        {
            /*
             * seeing this message on the real stderr is annoying - so we make
             * it DEBUG1 to suppress in normal use.
             */
            ereport(DEBUG1,
                    (errmsg("logger shutting down")));

            /*
             * Normal exit from the syslogger is here.  Note that we
             * deliberately do not close syslogFile before exiting; this is to
             * allow for the possibility of elog messages being generated
             * inside proc_exit.  Regular exit() will take care of flushing
             * and closing stdio channels.
             */
            proc_exit(0);
        }
    }
}

/*
 * Postmaster subroutine to start a syslogger subprocess.
 */
int
SysLogger_Start(void)
{// #lizard forgives
    pid_t        sysloggerPid;
    char       *filename;

    if (!Logging_collector)
        return 0;

    /*
     * If first time through, create the pipe which will receive stderr
     * output.
     *
     * If the syslogger crashes and needs to be restarted, we continue to use
     * the same pipe (indeed must do so, since extant backends will be writing
     * into that pipe).
     *
     * This means the postmaster must continue to hold the read end of the
     * pipe open, so we can pass it down to the reincarnated syslogger. This
     * is a bit klugy but we have little choice.
     */
#ifndef WIN32
    if (syslogPipe[0] < 0)
    {
        if (pipe(syslogPipe) < 0)
            ereport(FATAL,
                    (errcode_for_socket_access(),
                     (errmsg("could not create pipe for syslog: %m"))));
    }
#else
    if (!syslogPipe[0])
    {
        SECURITY_ATTRIBUTES sa;

        memset(&sa, 0, sizeof(SECURITY_ATTRIBUTES));
        sa.nLength = sizeof(SECURITY_ATTRIBUTES);
        sa.bInheritHandle = TRUE;

        if (!CreatePipe(&syslogPipe[0], &syslogPipe[1], &sa, 32768))
            ereport(FATAL,
                    (errcode_for_file_access(),
                     (errmsg("could not create pipe for syslog: %m"))));
    }
#endif

    /*
     * Create log directory if not present; ignore errors
     */
    mkdir(Log_directory, S_IRWXU);

    /*
     * The initial logfile is created right in the postmaster, to verify that
     * the Log_directory is writable.  We save the reference time so that the
     * syslogger child process can recompute this file name.
     *
     * It might look a bit strange to re-do this during a syslogger restart,
     * but we must do so since the postmaster closed syslogFile after the
     * previous fork (and remembering that old file wouldn't be right anyway).
     * Note we always append here, we won't overwrite any existing file.  This
     * is consistent with the normal rules, because by definition this is not
     * a time-based rotation.
     */
    first_syslogger_file_time = time(NULL);
    filename = logfile_getname(first_syslogger_file_time, NULL);

    syslogFile = logfile_open(filename, "a", false);

    pfree(filename);

#ifdef EXEC_BACKEND
    switch ((sysloggerPid = syslogger_forkexec()))
#else
    switch ((sysloggerPid = fork_process()))
#endif
    {
        case -1:
            ereport(LOG,
                    (errmsg("could not fork system logger: %m")));
            return 0;

#ifndef EXEC_BACKEND
        case 0:
            /* in postmaster child ... */
            InitPostmasterChild();

            /* Close the postmaster's sockets */
            ClosePostmasterPorts(true);

            /* Drop our connection to postmaster's shared memory, as well */
            dsm_detach_all();
            PGSharedMemoryDetach();

            /* do the work */
            SysLoggerMain(0, NULL);
            break;
#endif

        default:
            /* success, in postmaster */

            /* now we redirect stderr, if not done already */
            if (!redirection_done)
            {
#ifdef WIN32
                int            fd;
#endif

                /*
                 * Leave a breadcrumb trail when redirecting, in case the user
                 * forgets that redirection is active and looks only at the
                 * original stderr target file.
                 */
                ereport(LOG,
                        (errmsg("redirecting log output to logging collector process"),
                         errhint("Future log output will appear in directory \"%s\".",
                                 Log_directory)));

#ifndef WIN32
                fflush(stdout);
                if (dup2(syslogPipe[1], fileno(stdout)) < 0)
                    ereport(FATAL,
                            (errcode_for_file_access(),
                             errmsg("could not redirect stdout: %m")));
                fflush(stderr);
                if (dup2(syslogPipe[1], fileno(stderr)) < 0)
                    ereport(FATAL,
                            (errcode_for_file_access(),
                             errmsg("could not redirect stderr: %m")));
                /* Now we are done with the write end of the pipe. */
                close(syslogPipe[1]);
                syslogPipe[1] = -1;
#else

                /*
                 * open the pipe in binary mode and make sure stderr is binary
                 * after it's been dup'ed into, to avoid disturbing the pipe
                 * chunking protocol.
                 */
                fflush(stderr);
                fd = _open_osfhandle((intptr_t) syslogPipe[1],
                                     _O_APPEND | _O_BINARY);
                if (dup2(fd, _fileno(stderr)) < 0)
                    ereport(FATAL,
                            (errcode_for_file_access(),
                             errmsg("could not redirect stderr: %m")));
                close(fd);
                _setmode(_fileno(stderr), _O_BINARY);

                /*
                 * Now we are done with the write end of the pipe.
                 * CloseHandle() must not be called because the preceding
                 * close() closes the underlying handle.
                 */
                syslogPipe[1] = 0;
#endif
                redirection_done = true;
            }

            /* postmaster will never write the file; close it */
            fclose(syslogFile);
            syslogFile = NULL;
            return (int) sysloggerPid;
    }

    /* we should never reach here */
    return 0;
}


#ifdef EXEC_BACKEND

/*
 * syslogger_forkexec() -
 *
 * Format up the arglist for, then fork and exec, a syslogger process
 */
static pid_t
syslogger_forkexec(void)
{
    char       *av[10];
    int            ac = 0;
    char        filenobuf[32];

    av[ac++] = "postgres";
    av[ac++] = "--forklog";
    av[ac++] = NULL;            /* filled in by postmaster_forkexec */

    /* static variables (those not passed by write_backend_variables) */
#ifndef WIN32
    if (syslogFile != NULL)
        snprintf(filenobuf, sizeof(filenobuf), "%d",
                 fileno(syslogFile));
    else
        strcpy(filenobuf, "-1");
#else                            /* WIN32 */
    if (syslogFile != NULL)
        snprintf(filenobuf, sizeof(filenobuf), "%ld",
                 (long) _get_osfhandle(_fileno(syslogFile)));
    else
        strcpy(filenobuf, "0");
#endif                            /* WIN32 */
    av[ac++] = filenobuf;

    av[ac] = NULL;
    Assert(ac < lengthof(av));

    return postmaster_forkexec(ac, av);
}

/*
 * syslogger_parseArgs() -
 *
 * Extract data from the arglist for exec'ed syslogger process
 */
static void
syslogger_parseArgs(int argc, char *argv[])
{
    int            fd;

    Assert(argc == 4);
    argv += 3;

#ifndef WIN32
    fd = atoi(*argv++);
    if (fd != -1)
    {
        syslogFile = fdopen(fd, "a");
        setvbuf(syslogFile, NULL, PG_IOLBF, 0);
    }
#else                            /* WIN32 */
    fd = atoi(*argv++);
    if (fd != 0)
    {
        fd = _open_osfhandle(fd, _O_APPEND | _O_TEXT);
        if (fd > 0)
        {
            syslogFile = fdopen(fd, "a");
            setvbuf(syslogFile, NULL, PG_IOLBF, 0);
        }
    }
#endif                            /* WIN32 */
}
#endif                            /* EXEC_BACKEND */


/* --------------------------------
 *        pipe protocol handling
 * --------------------------------
 */

/*
 * Process data received through the syslogger pipe.
 *
 * This routine interprets the log pipe protocol which sends log messages as
 * (hopefully atomic) chunks - such chunks are detected and reassembled here.
 *
 * The protocol has a header that starts with two nul bytes, then has a 16 bit
 * length, the pid of the sending process, and a flag to indicate if it is
 * the last chunk in a message. Incomplete chunks are saved until we read some
 * more, and non-final chunks are accumulated until we get the final chunk.
 *
 * All of this is to avoid 2 problems:
 * . partial messages being written to logfiles (messes rotation), and
 * . messages from different backends being interleaved (messages garbled).
 *
 * Any non-protocol messages are written out directly. These should only come
 * from non-PostgreSQL sources, however (e.g. third party libraries writing to
 * stderr).
 *
 * logbuffer is the data input buffer, and *bytes_in_logbuffer is the number
 * of bytes present.  On exit, any not-yet-eaten data is left-justified in
 * logbuffer, and *bytes_in_logbuffer is updated.
 */
static void
process_pipe_input(char *logbuffer, int *bytes_in_logbuffer)
{// #lizard forgives
    char       *cursor = logbuffer;
    int            count = *bytes_in_logbuffer;
    int            dest = LOG_DESTINATION_STDERR;

    /* While we have enough for a header, process data... */
    while (count >= (int) (offsetof(PipeProtoHeader, data) + 1))
    {
        PipeProtoHeader p;
        int            chunklen;

        /* Do we have a valid header? */
        memcpy(&p, cursor, offsetof(PipeProtoHeader, data));
        if (p.nuls[0] == '\0' && p.nuls[1] == '\0' &&
            p.len > 0 && p.len <= PIPE_MAX_PAYLOAD &&
            p.pid != 0 &&
            (p.is_last == 't' || p.is_last == 'f' ||
             p.is_last == 'T' || p.is_last == 'F'))
        {
            List       *buffer_list;
            ListCell   *cell;
            save_buffer *existing_slot = NULL,
                       *free_slot = NULL;
            StringInfo    str;

            chunklen = PIPE_HEADER_SIZE + p.len;

            /* Fall out of loop if we don't have the whole chunk yet */
            if (count < chunklen)
                break;

            dest = (p.is_last == 'T' || p.is_last == 'F') ?
                LOG_DESTINATION_CSVLOG : LOG_DESTINATION_STDERR;

            /* Locate any existing buffer for this source pid */
            buffer_list = buffer_lists[p.pid % NBUFFER_LISTS];
            foreach(cell, buffer_list)
            {
                save_buffer *buf = (save_buffer *) lfirst(cell);

                if (buf->pid == p.pid)
                {
                    existing_slot = buf;
                    break;
                }
                if (buf->pid == 0 && free_slot == NULL)
                    free_slot = buf;
            }

            if (p.is_last == 'f' || p.is_last == 'F')
            {
                /*
                 * Save a complete non-final chunk in a per-pid buffer
                 */
                if (existing_slot != NULL)
                {
                    /* Add chunk to data from preceding chunks */
                    str = &(existing_slot->data);
                    appendBinaryStringInfo(str,
                                           cursor + PIPE_HEADER_SIZE,
                                           p.len);
                }
                else
                {
                    /* First chunk of message, save in a new buffer */
                    if (free_slot == NULL)
                    {
                        /*
                         * Need a free slot, but there isn't one in the list,
                         * so create a new one and extend the list with it.
                         */
                        free_slot = palloc(sizeof(save_buffer));
                        buffer_list = lappend(buffer_list, free_slot);
                        buffer_lists[p.pid % NBUFFER_LISTS] = buffer_list;
                    }
                    free_slot->pid = p.pid;
                    str = &(free_slot->data);
                    initStringInfo(str);
                    appendBinaryStringInfo(str,
                                           cursor + PIPE_HEADER_SIZE,
                                           p.len);
                }
            }
            else
            {
                /*
                 * Final chunk --- add it to anything saved for that pid, and
                 * either way write the whole thing out.
                 */
                if (existing_slot != NULL)
                {
                    str = &(existing_slot->data);
                    appendBinaryStringInfo(str,
                                           cursor + PIPE_HEADER_SIZE,
                                           p.len);
                    write_syslogger_file(str->data, str->len, dest);
                    /* Mark the buffer unused, and reclaim string storage */
                    existing_slot->pid = 0;
                    pfree(str->data);
                }
                else
                {
                    /* The whole message was one chunk, evidently. */
                    write_syslogger_file(cursor + PIPE_HEADER_SIZE, p.len,
                                         dest);
                }
            }

            /* Finished processing this chunk */
            cursor += chunklen;
            count -= chunklen;
        }
        else
        {
            /* Process non-protocol data */

            /*
             * Look for the start of a protocol header.  If found, dump data
             * up to there and repeat the loop.  Otherwise, dump it all and
             * fall out of the loop.  (Note: we want to dump it all if at all
             * possible, so as to avoid dividing non-protocol messages across
             * logfiles.  We expect that in many scenarios, a non-protocol
             * message will arrive all in one read(), and we want to respect
             * the read() boundary if possible.)
             */
            for (chunklen = 1; chunklen < count; chunklen++)
            {
                if (cursor[chunklen] == '\0')
                    break;
            }
            /* fall back on the stderr log as the destination */
            write_syslogger_file(cursor, chunklen, LOG_DESTINATION_STDERR);
            cursor += chunklen;
            count -= chunklen;
        }
    }

    /* We don't have a full chunk, so left-align what remains in the buffer */
    if (count > 0 && cursor != logbuffer)
        memmove(logbuffer, cursor, count);
    *bytes_in_logbuffer = count;
}

/*
 * Force out any buffered data
 *
 * This is currently used only at syslogger shutdown, but could perhaps be
 * useful at other times, so it is careful to leave things in a clean state.
 */
static void
flush_pipe_input(char *logbuffer, int *bytes_in_logbuffer)
{
    int            i;

    /* Dump any incomplete protocol messages */
    for (i = 0; i < NBUFFER_LISTS; i++)
    {
        List       *list = buffer_lists[i];
        ListCell   *cell;

        foreach(cell, list)
        {
            save_buffer *buf = (save_buffer *) lfirst(cell);

            if (buf->pid != 0)
            {
                StringInfo    str = &(buf->data);

                write_syslogger_file(str->data, str->len,
                                     LOG_DESTINATION_STDERR);
                /* Mark the buffer unused, and reclaim string storage */
                buf->pid = 0;
                pfree(str->data);
            }
        }
    }

    /*
     * Force out any remaining pipe data as-is; we don't bother trying to
     * remove any protocol headers that may exist in it.
     */
    if (*bytes_in_logbuffer > 0)
        write_syslogger_file(logbuffer, *bytes_in_logbuffer,
                             LOG_DESTINATION_STDERR);
    *bytes_in_logbuffer = 0;
}


/* --------------------------------
 *        logfile routines
 * --------------------------------
 */

/*
 * Write text to the currently open logfile
 *
 * This is exported so that elog.c can call it when am_syslogger is true.
 * This allows the syslogger process to record elog messages of its own,
 * even though its stderr does not point at the syslog pipe.
 */
void
write_syslogger_file(const char *buffer, int count, int destination)
{
    int            rc;
    FILE       *logfile;

    if (destination == LOG_DESTINATION_CSVLOG && csvlogFile == NULL)
        open_csvlogfile();

    logfile = destination == LOG_DESTINATION_CSVLOG ? csvlogFile : syslogFile;
    rc = fwrite(buffer, 1, count, logfile);

    /* can't use ereport here because of possible recursion */
    if (rc != count)
        write_stderr("could not write to log file: %s\n", strerror(errno));
}

#ifdef WIN32

/*
 * Worker thread to transfer data from the pipe to the current logfile.
 *
 * We need this because on Windows, WaitforMultipleObjects does not work on
 * unnamed pipes: it always reports "signaled", so the blocking ReadFile won't
 * allow for SIGHUP; and select is for sockets only.
 */
static unsigned int __stdcall
pipeThread(void *arg)
{// #lizard forgives
    char        logbuffer[READ_BUF_SIZE];
    int            bytes_in_logbuffer = 0;

    for (;;)
    {
        DWORD        bytesRead;
        BOOL        result;

        result = ReadFile(syslogPipe[0],
                          logbuffer + bytes_in_logbuffer,
                          sizeof(logbuffer) - bytes_in_logbuffer,
                          &bytesRead, 0);

        /*
         * Enter critical section before doing anything that might touch
         * global state shared by the main thread. Anything that uses
         * palloc()/pfree() in particular are not safe outside the critical
         * section.
         */
        EnterCriticalSection(&sysloggerSection);
        if (!result)
        {
            DWORD        error = GetLastError();

            if (error == ERROR_HANDLE_EOF ||
                error == ERROR_BROKEN_PIPE)
                break;
            _dosmaperr(error);
            ereport(LOG,
                    (errcode_for_file_access(),
                     errmsg("could not read from logger pipe: %m")));
        }
        else if (bytesRead > 0)
        {
            bytes_in_logbuffer += bytesRead;
            process_pipe_input(logbuffer, &bytes_in_logbuffer);
        }

        /*
         * If we've filled the current logfile, nudge the main thread to do a
         * log rotation.
         */
        if (Log_RotationSize > 0)
        {
            if (ftell(syslogFile) >= Log_RotationSize * 1024L ||
                (csvlogFile != NULL && ftell(csvlogFile) >= Log_RotationSize * 1024L))
                SetLatch(MyLatch);
        }
        LeaveCriticalSection(&sysloggerSection);
    }

    /* We exit the above loop only upon detecting pipe EOF */
    pipe_eof_seen = true;

    /* if there's any data left then force it out now */
    flush_pipe_input(logbuffer, &bytes_in_logbuffer);

    /* set the latch to waken the main thread, which will quit */
    SetLatch(MyLatch);

    LeaveCriticalSection(&sysloggerSection);
    _endthread();
    return 0;
}
#endif                            /* WIN32 */

/*
 * Open the csv log file - we do this opportunistically, because
 * we don't know if CSV logging will be wanted.
 *
 * This is only used the first time we open the csv log in a given syslogger
 * process, not during rotations.  As with opening the main log file, we
 * always append in this situation.
 */
static void
open_csvlogfile(void)
{
    char       *filename;

    filename = logfile_getname(time(NULL), ".csv");

    csvlogFile = logfile_open(filename, "a", false);

    if (last_csv_file_name != NULL) /* probably shouldn't happen */
        pfree(last_csv_file_name);

    last_csv_file_name = filename;

    update_metainfo_datafile();
}

/*
 * Open a new logfile with proper permissions and buffering options.
 *
 * If allow_errors is true, we just log any open failure and return NULL
 * (with errno still correct for the fopen failure).
 * Otherwise, errors are treated as fatal.
 */
static FILE *
logfile_open(const char *filename, const char *mode, bool allow_errors)
{
    FILE       *fh;
    mode_t        oumask;

    /*
     * Note we do not let Log_file_mode disable IWUSR, since we certainly want
     * to be able to write the files ourselves.
     */
    oumask = umask((mode_t) ((~(Log_file_mode | S_IWUSR)) & (S_IRWXU | S_IRWXG | S_IRWXO)));
    fh = fopen(filename, mode);
    umask(oumask);

    if (fh)
    {
        setvbuf(fh, NULL, PG_IOLBF, 0);

#ifdef WIN32
        /* use CRLF line endings on Windows */
        _setmode(_fileno(fh), _O_TEXT);
#endif
    }
    else
    {
        int            save_errno = errno;

        ereport(allow_errors ? LOG : FATAL,
                (errcode_for_file_access(),
                 errmsg("could not open log file \"%s\": %m",
                        filename)));
        errno = save_errno;
    }

    return fh;
}

/*
 * perform logfile rotation
 */
static void
logfile_rotate(bool time_based_rotation, int size_rotation_for)
{// #lizard forgives
    char       *filename;
    char       *csvfilename = NULL;
    pg_time_t    fntime;
    FILE       *fh;

    rotation_requested = false;

    /*
     * When doing a time-based rotation, invent the new logfile name based on
     * the planned rotation time, not current time, to avoid "slippage" in the
     * file name when we don't do the rotation immediately.
     */
    if (time_based_rotation)
        fntime = next_rotation_time;
    else
        fntime = time(NULL);
    filename = logfile_getname(fntime, NULL);
    if (csvlogFile != NULL)
        csvfilename = logfile_getname(fntime, ".csv");

    /*
     * Decide whether to overwrite or append.  We can overwrite if (a)
     * Log_truncate_on_rotation is set, (b) the rotation was triggered by
     * elapsed time and not something else, and (c) the computed file name is
     * different from what we were previously logging into.
     *
     * Note: last_file_name should never be NULL here, but if it is, append.
     */
    if (time_based_rotation || (size_rotation_for & LOG_DESTINATION_STDERR))
    {
        if (Log_truncate_on_rotation && time_based_rotation &&
            last_file_name != NULL &&
            strcmp(filename, last_file_name) != 0)
            fh = logfile_open(filename, "w", true);
        else
            fh = logfile_open(filename, "a", true);

        if (!fh)
        {
            /*
             * ENFILE/EMFILE are not too surprising on a busy system; just
             * keep using the old file till we manage to get a new one.
             * Otherwise, assume something's wrong with Log_directory and stop
             * trying to create files.
             */
            if (errno != ENFILE && errno != EMFILE)
            {
                ereport(LOG,
                        (errmsg("disabling automatic rotation (use SIGHUP to re-enable)")));
                rotation_disabled = true;
            }

            if (filename)
                pfree(filename);
            if (csvfilename)
                pfree(csvfilename);
            return;
        }

        fclose(syslogFile);
        syslogFile = fh;

        /* instead of pfree'ing filename, remember it for next time */
        if (last_file_name != NULL)
            pfree(last_file_name);
        last_file_name = filename;
        filename = NULL;
    }

    /* Same as above, but for csv file. */

    if (csvlogFile != NULL &&
        (time_based_rotation || (size_rotation_for & LOG_DESTINATION_CSVLOG)))
    {
        if (Log_truncate_on_rotation && time_based_rotation &&
            last_csv_file_name != NULL &&
            strcmp(csvfilename, last_csv_file_name) != 0)
            fh = logfile_open(csvfilename, "w", true);
        else
            fh = logfile_open(csvfilename, "a", true);

        if (!fh)
        {
            /*
             * ENFILE/EMFILE are not too surprising on a busy system; just
             * keep using the old file till we manage to get a new one.
             * Otherwise, assume something's wrong with Log_directory and stop
             * trying to create files.
             */
            if (errno != ENFILE && errno != EMFILE)
            {
                ereport(LOG,
                        (errmsg("disabling automatic rotation (use SIGHUP to re-enable)")));
                rotation_disabled = true;
            }

            if (filename)
                pfree(filename);
            if (csvfilename)
                pfree(csvfilename);
            return;
        }

        fclose(csvlogFile);
        csvlogFile = fh;

        /* instead of pfree'ing filename, remember it for next time */
        if (last_csv_file_name != NULL)
            pfree(last_csv_file_name);
        last_csv_file_name = csvfilename;
        csvfilename = NULL;
    }

    if (filename)
        pfree(filename);
    if (csvfilename)
        pfree(csvfilename);

    update_metainfo_datafile();

    set_next_rotation_time();
}


/*
 * construct logfile name using timestamp information
 *
 * If suffix isn't NULL, append it to the name, replacing any ".log"
 * that may be in the pattern.
 *
 * Result is palloc'd.
 */
static char *
logfile_getname(pg_time_t timestamp, const char *suffix)
{
    char       *filename;
    int            len;

    filename = palloc(MAXPGPATH);

    snprintf(filename, MAXPGPATH, "%s/", Log_directory);

    len = strlen(filename);

    /* treat Log_filename as a strftime pattern */
    pg_strftime(filename + len, MAXPGPATH - len, Log_filename,
                pg_localtime(&timestamp, log_timezone));

    if (suffix != NULL)
    {
        len = strlen(filename);
        if (len > 4 && (strcmp(filename + (len - 4), ".log") == 0))
            len -= 4;
        strlcpy(filename + len, suffix, MAXPGPATH - len);
    }

    return filename;
}

/*
 * Determine the next planned rotation time, and store in next_rotation_time.
 */
static void
set_next_rotation_time(void)
{
    pg_time_t    now;
    struct pg_tm *tm;
    int            rotinterval;

    /* nothing to do if time-based rotation is disabled */
    if (Log_RotationAge <= 0)
        return;

    /*
     * The requirements here are to choose the next time > now that is a
     * "multiple" of the log rotation interval.  "Multiple" can be interpreted
     * fairly loosely.  In this version we align to log_timezone rather than
     * GMT.
     */
    rotinterval = Log_RotationAge * SECS_PER_MINUTE;    /* convert to seconds */
    now = (pg_time_t) time(NULL);
    tm = pg_localtime(&now, log_timezone);
    now += tm->tm_gmtoff;
    now -= now % rotinterval;
    now += rotinterval;
    now -= tm->tm_gmtoff;
    next_rotation_time = now;
}

/*
 * Store the name of the file(s) where the log collector, when enabled, writes
 * log messages.  Useful for finding the name(s) of the current log file(s)
 * when there is time-based logfile rotation.  Filenames are stored in a
 * temporary file and which is renamed into the final destination for
 * atomicity.
 */
static void
update_metainfo_datafile(void)
{// #lizard forgives
    FILE       *fh;

    if (!(Log_destination & LOG_DESTINATION_STDERR) &&
        !(Log_destination & LOG_DESTINATION_CSVLOG))
    {
        if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
            ereport(LOG,
                    (errcode_for_file_access(),
                     errmsg("could not remove file \"%s\": %m",
                            LOG_METAINFO_DATAFILE)));
        return;
    }

    if ((fh = logfile_open(LOG_METAINFO_DATAFILE_TMP, "w", true)) == NULL)
    {
        ereport(LOG,
                (errcode_for_file_access(),
                 errmsg("could not open file \"%s\": %m",
                        LOG_METAINFO_DATAFILE_TMP)));
        return;
    }

    if (last_file_name && (Log_destination & LOG_DESTINATION_STDERR))
    {
        if (fprintf(fh, "stderr %s\n", last_file_name) < 0)
        {
            ereport(LOG,
                    (errcode_for_file_access(),
                     errmsg("could not write file \"%s\": %m",
                            LOG_METAINFO_DATAFILE_TMP)));
            fclose(fh);
            return;
        }
    }

    if (last_csv_file_name && (Log_destination & LOG_DESTINATION_CSVLOG))
    {
        if (fprintf(fh, "csvlog %s\n", last_csv_file_name) < 0)
        {
            ereport(LOG,
                    (errcode_for_file_access(),
                     errmsg("could not write file \"%s\": %m",
                            LOG_METAINFO_DATAFILE_TMP)));
            fclose(fh);
            return;
        }
    }
    fclose(fh);

    if (rename(LOG_METAINFO_DATAFILE_TMP, LOG_METAINFO_DATAFILE) != 0)
        ereport(LOG,
                (errcode_for_file_access(),
                 errmsg("could not rename file \"%s\" to \"%s\": %m",
                        LOG_METAINFO_DATAFILE_TMP, LOG_METAINFO_DATAFILE)));
}

/* --------------------------------
 *        signal handler routines
 * --------------------------------
 */

/* SIGHUP: set flag to reload config file */
static void
sigHupHandler(SIGNAL_ARGS)
{
    int            save_errno = errno;

    got_SIGHUP = true;
    SetLatch(MyLatch);

    errno = save_errno;
}

/* SIGUSR1: set flag to rotate logfile */
static void
sigUsr1Handler(SIGNAL_ARGS)
{
    int            save_errno = errno;

    rotation_requested = true;
    SetLatch(MyLatch);

    errno = save_errno;
}
